# 编写beautifulsoup爬虫，获取猫眼电影网站的top100电影名   优化成多线程

import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor

cookies = {
    '__mta': '45528731.1734504172431.1734504172431.1734504175582.2',
    '_lxsdk_cuid': '193d7c85914c8-067df6ac552017-26011851-144000-193d7c85914c8',
    '_ga': 'GA1.1.1747353465.1734492052',
    'uuid_n_v': 'v1',
    'uuid': '1288E9E0BD0A11EF857DAF72E470C7F5EEAE9DD7ECC04F17A1519C38BB19816C',
    '_csrf': '6817e04ddcfda0bc049410995e471c735db79f22a20adac267b0ac030d58cf78',
    'Hm_lvt_e0bacf12e04a7bd88ddbd9c74ef2b533': '1734503644',
    'HMACCOUNT': 'B2C21EEE3F1F23B1',
    '_lx_utm': 'utm_source%3DBaidu%26utm_medium%3Dorganic',
    '_lxsdk': '1288E9E0BD0A11EF857DAF72E470C7F5EEAE9DD7ECC04F17A1519C38BB19816C',
    '__mta': '45528731.1734504172431.1734504172431.1734504172431.1',
    'Hm_lpvt_e0bacf12e04a7bd88ddbd9c74ef2b533': '1734504175',
    '_ga_WN80P4PSY7': 'GS1.1.1734502302.2.1.1734504177.0.0.0',
    '_lxsdk_s': '193d864befb-d9a-f91-b82%7C%7C421',
}

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
    # 'Cookie': '__mta=45528731.1734504172431.1734504172431.1734504175582.2; _lxsdk_cuid=193d7c85914c8-067df6ac552017-26011851-144000-193d7c85914c8; _ga=GA1.1.1747353465.1734492052; uuid_n_v=v1; uuid=1288E9E0BD0A11EF857DAF72E470C7F5EEAE9DD7ECC04F17A1519C38BB19816C; _csrf=6817e04ddcfda0bc049410995e471c735db79f22a20adac267b0ac030d58cf78; Hm_lvt_e0bacf12e04a7bd88ddbd9c74ef2b533=1734503644; HMACCOUNT=B2C21EEE3F1F23B1; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk=1288E9E0BD0A11EF857DAF72E470C7F5EEAE9DD7ECC04F17A1519C38BB19816C; __mta=45528731.1734504172431.1734504172431.1734504172431.1; Hm_lpvt_e0bacf12e04a7bd88ddbd9c74ef2b533=1734504175; _ga_WN80P4PSY7=GS1.1.1734502302.2.1.1734504177.0.0.0; _lxsdk_s=193d864befb-d9a-f91-b82%7C%7C421',
    'Referer': 'https://www.maoyan.com/',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
    'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}


def crawl(id):
    params = {
        'timeStamp': '1734504174892',
        'channelId': '40011',
        'index': '7',
        'signKey': 'be1c16a34bc1cb7dcdcdbf7f58b9e9d4',
        'sVersion': '1',
        'webdriver': 'false',
        'offset': id,
    }
    response = requests.get('https://www.maoyan.com/board/4', params=params, cookies=cookies, headers=headers)
    # 用beautifulsoup解析
    soup = BeautifulSoup(response.text, 'html.parser')
    data = soup.find_all('p', class_='name')
    list = []
    for i in data:
        list.append(i.find('a').text)
    return list



#单页面不用多线程
if __name__ == '__main__':
    # 创建一个线程池
    list = []
    for i in range(0,100 , 10):
        list.append(i)
    executor = ThreadPoolExecutor(max_workers=3)
    file = open('maoyan_movie_name2.txt', 'a', encoding='utf-8')
    for result in executor.map(crawl,list):
        for movieName in result:
            file.write(movieName + '\n')

